# Load necessary libraries
library(forecast)
Registered S3 method overwritten by 'quantmod':
method from
as.zoo.data.frame zoo
library(tseries)
‘tseries’ version: 0.10-55
‘tseries’ is a package for time series analysis and computational finance.
See ‘library(help="tseries")’ for details.
library(tidyverse)
── Attaching core tidyverse packages ───────────────────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.1 ✔ readr 2.1.4
✔ forcats 1.0.0 ✔ stringr 1.5.0
✔ ggplot2 3.4.2 ✔ tibble 3.2.1
✔ lubridate 1.9.2 ✔ tidyr 1.3.0
✔ purrr 1.0.1 ── Conflicts ─────────────────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
library(readr)
library(ggplot2)
library(zoo)
Attaching package: ‘zoo’
The following objects are masked from ‘package:base’:
as.Date, as.Date.numeric
library(TSA)
Registered S3 methods overwritten by 'TSA':
method from
fitted.Arima forecast
plot.Arima forecast
Attaching package: ‘TSA’
The following object is masked from ‘package:readr’:
spec
The following objects are masked from ‘package:stats’:
acf, arima
The following object is masked from ‘package:utils’:
tar
library(rugarch)
Loading required package: parallel
Attaching package: ‘rugarch’
The following object is masked from ‘package:purrr’:
reduce
The following object is masked from ‘package:stats’:
sigma
library(forecast)
library(PerformanceAnalytics)
Loading required package: xts
######################### Warning from 'xts' package ##########################
# #
# The dplyr lag() function breaks how base R's lag() function is supposed to #
# work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or #
# source() into this session won't work correctly. #
# #
# Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
# conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop #
# dplyr from breaking base R's lag() function. #
# #
# Code in packages is not affected. It's protected by R's namespace mechanism #
# Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning. #
# #
###############################################################################
Attaching package: ‘xts’
The following objects are masked from ‘package:dplyr’:
first, last
Attaching package: ‘PerformanceAnalytics’
The following objects are masked from ‘package:TSA’:
kurtosis, skewness
The following object is masked from ‘package:graphics’:
legend
library(xts)
library(quantmod)
Loading required package: TTR
file_path <- "~/Documents/GitHub/MA-641-Course-Project/Scratch Work/RSXFSN.csv"
retail_sales_data <- read_csv(file_path)
Rows: 126 Columns: 2── Column specification ─────────────────────────────────────────────────────────────────────
Delimiter: ","
dbl (1): RSXFSN
date (1): DATE
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Convert the DATE column to Date type
retail_sales_data$DATE <- as.Date(retail_sales_data$DATE, format="%Y-%m-%d")
The following dataset is part of the Advance Monthly Retail Trade Survery conducted by the U.S Census Bureau, retrieved from FRED and measures the retail trade activity in the United States. It provides an early estimate of monthly sales for retail and food service companies capturing consumer demand for goods and services.
The RSXFSN data series measures the dollar value of sales and is reported on millions of dollars. This data is not seasonally adjusted and is released monthly. The period of observation for this project spans a 10 year period from January 2014 to May 2024, totaling 126 observations.
head(retail_sales_data)
summary(retail_sales_data)
DATE RSXFSN
Min. :2014-01-01 Min. :337453
1st Qu.:2016-08-08 1st Qu.:405145
Median :2019-03-16 Median :449590
Mean :2019-03-17 Mean :474666
3rd Qu.:2021-10-24 3rd Qu.:554140
Max. :2024-06-01 Max. :668957
# Check the number of rows in the original dataset
num_data_points <- nrow(retail_sales_data)
cat("Number of data points in the original dataset:", num_data_points, "\n")
Number of data points in the original dataset: 126
# Plot the original data
ggplot(data = retail_sales_data, aes(x = DATE, y = RSXFSN)) +
geom_line(color = "blue") +
labs(title = "Time Series of Monthly Retail Sales",
x = "Date",
y = "Retail Sales") +
theme_minimal()
The time series displays a noticeable upward trend in retails sales with periodic spikes in the final months of the year, indicating the presence of seasonality.
# Create a time series object
ts_data <- ts(retail_sales_data$RSXFSN, start = c(2014, 12), frequency = 12)
boxplot(ts_data ~ cycle(ts_data), main="Seasonal Boxplot of Monthly Retail Sales", ylab = "Sales", xlab = "Month")
Although it makes sense that the month of November would see peak sales due to Black Friday and Cyber Monday events, it seems counter-intuitive that December would have such a stark decrease in sales, considering the holiday season.
# Create ACF plot
acf(retail_sales_data$RSXFSN, main = "ACF of Monthly Retail Sales", lag.max = 100)
# Create PACF plot
par(mar=c(5, 5, 4, 2) + 0.1)
pacf(retail_sales_data$RSXFSN, main = "PACF of Monthly Retail Sales", lag.max = 100)
adf_test_result <- adf.test(retail_sales_data$RSXFSN)
print(adf_test_result)
Augmented Dickey-Fuller Test
data: retail_sales_data$RSXFSN
Dickey-Fuller = -2.4875, Lag order = 4, p-value = 0.3739
alternative hypothesis: stationary
The data shows clear non-stationarity. Therefore, we proceed with differencing measures to achieve stationarity.
# Calculate the differences of the RSXFSN series
diff_series <- diff(retail_sales_data$RSXFSN)
# Plot the differenced data
diff_data <- data.frame(DATE = retail_sales_data$DATE[-1], Difference = diff_series)
ggplot(data = diff_data, aes(x = DATE, y = Difference)) +
geom_line(color = "red") +
labs(title = "Differenced Time Series of Monthly Retail Sales",
x = "Date",
y = "Difference in Retail Sales") +
theme_minimal()
adf_test_result <- adf.test(diff_series)
Warning: p-value smaller than printed p-value
print(adf_test_result)
Augmented Dickey-Fuller Test
data: diff_series
Dickey-Fuller = -8.0818, Lag order = 4, p-value = 0.01
alternative hypothesis: stationary
p-value from ADF = 0.01 < 0.05 -> stationary
# Take log transformation of data
log_series <- log(retail_sales_data$RSXFSN)
# Apply differencing
diff_log_series <- diff(log_series)
# Plot the differenced data
diff_log_data <- data.frame(DATE = retail_sales_data$DATE[-1], Difference = diff_log_series)
ggplot(data = diff_data, aes(x = DATE, y = Difference)) +
geom_line(color = "red") +
labs(title = "Differenced Time Series of Monthly Retail Sales",
x = "Date",
y = "Difference in Retail Sales") +
theme_minimal()
adf_test_result <- adf.test(diff_log_series)
Warning: p-value smaller than printed p-value
print(adf_test_result)
Augmented Dickey-Fuller Test
data: diff_log_series
Dickey-Fuller = -7.9621, Lag order = 4, p-value = 0.01
alternative hypothesis: stationary
One order of differencing, both with and without taking the logarithmic transformation, achieves stationarity.
# Apply seasonal differencing to the RSXFSN series
seasonal_diff_series <- diff(retail_sales_data$RSXFSN, lag = 12)
# Create a data frame for the seasonally differenced data
seasonal_diff_data <- data.frame(DATE = retail_sales_data$DATE[-(1:12)], Difference = seasonal_diff_series)
# Plot the seasonally differenced data
ggplot(data = seasonal_diff_data, aes(x = DATE, y = Difference)) +
geom_line(color = "green") +
labs(title = "Seasonally Differenced Time Series of Monthly Retail Sales",
x = "Date",
y = "Seasonal Difference in Retail Sales") +
theme_minimal()
# Apply seasonal differencing to the RSXFSN series
seasonal_diff_series <- diff(retail_sales_data$RSXFSN, lag = 12)
# Perform additional first differencing on seasonally differenced data to ensure stationarity
combined_diff_data <- diff(seasonal_diff_series)
# Create a data frame for the seasonally differenced data
doubly_diff_data <- data.frame(DATE = retail_sales_data$DATE[-(1:13)], Difference = combined_diff_data)
# Plot the seasonally differenced data
ggplot(data = doubly_diff_data, aes(x = DATE, y = Difference)) +
geom_line(color = "purple") +
labs(title = "Combined Seasonally Differenced Time Series of Monthly Retail Sales",
x = "Date",
y = "Seasonal Difference in Retail Sales") +
theme_minimal()
adf_test_result <- adf.test(combined_diff_data)
Warning: p-value smaller than printed p-value
print(adf_test_result)
Augmented Dickey-Fuller Test
data: combined_diff_data
Dickey-Fuller = -7.1014, Lag order = 4, p-value = 0.01
alternative hypothesis: stationary
Taking a seasonal difference of the data, and then an additional order of differencing produces the time series most resembling white noise, albeit with clear volatility between 2020 and 2022. This can perhaps be explained by the COVID-19 pandemic, which saw many store closures and and ceasing of retail activities. We now investigate the ACF and PACF plots with this data.
# Plot ACF for the differenced data
acf(combined_diff_data, main="ACF of Differenced RSXFSN", lag.max=50)
# PACF plot for differenced data
par(mar=c(5, 5, 4, 2) + 0.1)
pacf(combined_diff_data, main="PACF of Differenced RSXFSN", lag.max=50)
The ACF displays an abrupt cut-off after lag 12, and the PACF gradually decays, which is indicative of a Moving Average model.
# Fit a seasonal ARIMA model manually
# Non-seasonal part: ARIMA(0,0,2), Seasonal part: ARIMA(0,2,12)[12]
adjusted_arima_model <- Arima(combined_diff_data, order = c(0, 0, 2), seasonal = c(0, 0, 12))
# Summary of the adjusted ARIMA model
summary(adjusted_arima_model)
Series: combined_diff_data
ARIMA(0,0,2) with non-zero mean
Coefficients:
ma1 ma2 mean
-0.1526 -0.4173 12.7487
s.e. 0.0854 0.0849 749.6410
sigma^2 = 3.38e+08: log likelihood = -1268.63
AIC=2545.26 AICc=2545.63 BIC=2556.17
Training set error measures:
ME RMSE MAE MPE MAPE MASE ACF1
Training set -22.10046 18139.64 11832.41 56.0421 156.7261 0.5800389 0.001788122
# Diagnostic checking of the adjusted model
checkresiduals(adjusted_arima_model)
Ljung-Box test
data: Residuals from ARIMA(0,0,2) with non-zero mean
Q* = 4.2086, df = 8, p-value = 0.8378
Model df: 2. Total lags used: 10
# Use tsdiag to generate diagnostic plots
tsdiag(adjusted_arima_model)
# Fit a seasonal ARIMA model
# Non-seasonal part: ARIMA(2,1,0), Seasonal part: ARIMA(1,1,1)[12]
adjusted_arima_model_2 <- Arima(combined_diff_data, order = c(0, 0, 12), seasonal = c(0, 0, 10))
# Summary of the adjusted ARIMA model
summary(adjusted_arima_model_2)
Series: combined_diff_data
ARIMA(0,0,12) with non-zero mean
Coefficients:
ma1 ma2 ma3 ma4 ma5 ma6 ma7 ma8 ma9 ma10
-0.2489 -0.1880 0.1805 -0.1458 -0.0933 0.2216 -0.1103 -0.1227 0.3371 0.0174
s.e. 0.0937 0.0997 0.1094 0.0942 0.0998 0.1117 0.0939 0.0954 0.1138 0.0935
ma11 ma12 mean
-0.2301 -0.6175 156.1416
s.e. 0.1065 0.1106 285.6890
sigma^2 = 216418099: log likelihood = -1246.91
AIC=2521.81 AICc=2526.1 BIC=2559.99
Training set error measures:
ME RMSE MAE MPE MAPE MASE ACF1
Training set 203.7765 13839.09 9898.97 12.37666 183.9934 0.4852594 -0.04652619
# Diagnostic checking of the adjusted model
checkresiduals(adjusted_arima_model_2)
Ljung-Box test
data: Residuals from ARIMA(0,0,12) with non-zero mean
Q* = 11.244, df = 3, p-value = 0.01048
Model df: 12. Total lags used: 15
# Use tsdiag to generate diagnostic plots
tsdiag(adjusted_arima_model_2)
The residuals’ ACF and PACF plots indicate that the model’s residuals are mostly uncorrelated, suggesting a good fit, though the SARIMA(0,0,12)(0,0,10) is slightly better with an AIC of 2521.81 versus 2545.26. While the SARIMA model captures the seasonality and general trend, it may not fully account for the extreme fluctuations seen in the historical data. For better predictions, it might be necessary to explore other models or include additional explanatory variables.
# Calculate returns
returns <- diff(log(retail_sales_data$RSXFSN))
# Square the returns
squared_returns <- returns^2
# Create a data frame for plotting
squared_returns_data <- data.frame(DATE = retail_sales_data$DATE[-1], Squared_Returns = squared_returns)
# Plot the squared returns
library(ggplot2)
ggplot(data = squared_returns_data, aes(x = DATE, y = Squared_Returns)) +
geom_line(color = "blue") +
labs(title = "Squared Returns of Monthly Retail Sales",
x = "Date",
y = "Squared Returns") +
theme_minimal()
The squared returns of the monthly sales data shows much periodic volatility, which indicates that the data is a strong candidate for the GARCH model.
# Calculate residuals
arima_residuals <- residuals(adjusted_arima_model_2)
# Square the residuals to focus on volatility
squared_arima_residuals <- arima_residuals^2
par(mar=c(5, 5, 4, 2) + 0.1)
# Plot ACF of squared residuals
Acf(squared_arima_residuals, main="ACF of Squared Residuals")
# Plot PACF of squared residuals
Pacf(squared_arima_residuals, main="PACF of Squared Residuals")
The ACF of the squared residuals shows significant autocorrelation at various lags, suggesting periods of high volatility followed by high volatility and periods of low volatility follow periods of low volatility. This pattern is indicative of volatility clustering, a characteristic of financial time series data. The presence of significant autocorrelation in squared residuals also suggests nonlinearity in variance, highlighting a need for models like GARCH.
combined_xts <- xts(combined_diff_data, order.by = as.Date(time(combined_diff_data)))
# Fit the SARIMA model
# Extract residuals from the SARIMA model
sarima_residuals <- residuals(adjusted_arima_model_2)
# Extract dates from the combined_xts
dates <- index(combined_xts)
# Convert SARIMA residuals to xts format
sarima_residuals_xts <- xts(sarima_residuals, order.by = dates)
# Fit a GARCH model on the SARIMA residuals
# Define a simple GARCH(1,1) model
garch_spec <- ugarchspec(
variance.model = list(model = "sGARCH", garchOrder = c(1, 1)),
mean.model = list(armaOrder = c(0, 0), include.mean = FALSE),
distribution.model = "norm"
)
# Fit the GARCH model on SARIMA residuals
garch_fit <- ugarchfit(spec = garch_spec, data = sarima_residuals_xts)
Warning:
rugarch-->warning: failed to invert hessian
# Summarize GARCH model fit
summary(garch_fit)
Length Class Mode
1 uGARCHfit S4
# Evaluate the Combined Model
# Check residuals of the SARIMA model
checkresiduals(adjusted_arima_model_2)
Ljung-Box test
data: Residuals from ARIMA(0,0,12) with non-zero mean
Q* = 11.244, df = 3, p-value = 0.01048
Model df: 12. Total lags used: 15
# Plot diagnostic plots for GARCH model
# Plot diagnostics
plot(garch_fit, which = 1)
plot(garch_fit, which = 2)
please wait...calculating quantiles...
plot(garch_fit, which = 3)
plot(garch_fit, which = 4)
plot(garch_fit, which = 5)
plot(garch_fit, which = 6)
plot(garch_fit, which = 7)
plot(garch_fit, which = 8)
plot(garch_fit, which = 9)
plot(garch_fit, which = 10)
plot(garch_fit, which = 11)
plot(garch_fit, which = 12)
# Check ACF of standardized residuals from the GARCH model
garch_residuals <- residuals(garch_fit, standardize = TRUE)
# Convert GARCH residuals to xts format, adjust for any differencing
garch_residuals_xts <- xts(garch_residuals, order.by = dates[1:length(garch_residuals)]) # Adjust index if necessary
# Plot ACF using PerformanceAnalytics
chart.ACFplus(garch_residuals_xts, main = "ACF of Standardized GARCH Residuals")
# Plot rolling volatility to assess dynamic volatility over time
chart.RollingPerformance(garch_residuals_xts, width = 12, FUN = "sd", main = "Rolling Volatility (Standard Deviation)")
The ACF and PACF of the Standardized Residuals show no significant spikes except for the first, indicating that the residuals are uncorrelated and resemble white noise. In the QQ plot, the points fall close to the 45-degree line along the center and top-right-most portion of the graph, albeit with significant deviations at the bottom tail.
# Set the maximum number of lags for the Ljung-Box test
max_lags <- 20
# Initialize a vector to store p-values
p_values <- numeric(max_lags)
# Calculate the Ljung-Box test p-values for each lag
for (lag in 1:max_lags) {
lb_test <- Box.test(garch_residuals, lag = lag, type = "Ljung-Box", fitdf = 0)
p_values[lag] <- lb_test$p.value
}
# Create a plot of p-values
plot(1:max_lags, p_values, type = "b", pch = 19, col = "blue",
xlab = "Lag", ylab = "p-value",
main = "Ljung-Box Test p-values for Standardized Residuals")
abline(h = 0.05, col = "red", lty = 2) # Add a line at the 0.05 significance level
# Creating a dataframe for plotting
residuals_df <- data.frame(Residuals = as.numeric(garch_residuals))
# Specify the number of periods you want to forecast
forecast_horizon <- 12
# Forecast future values using the SARIMA model
sarima_forecast <- forecast(adjusted_arima_model_2, h = forecast_horizon)
# Plot SARIMA forecast
plot(sarima_forecast, main = "SARIMA Forecast")
# Step 4: Forecast using the GARCH model
garch_forecast <- ugarchforecast(garch_fit, n.ahead = forecast_horizon)
# Extract forecasted volatility (standard deviation) from the GARCH model
volatility_forecast <- sigma(garch_forecast)
# Print volatility forecast
print(volatility_forecast)
# Combine SARIMA forecast with GARCH volatility forecast
plot(sarima_forecast, main = "Forecasted Mean with SARIMA", xlab = "Time", ylab = "Values")
lines(volatility_forecast, col = "red", lty = 2)
legend("topright", legend = c("SARIMA Mean", "GARCH Volatility"), col = c("blue", "red"), lty = 1:2)
# Load the data
aapl_monthly_data <- read.csv("~/Documents/GitHub/MA-641-Course-Project/AAPL_Monthly2016.csv")
# Convert the date column to Date type
aapl_monthly_data$Date <- as.Date(aapl_monthly_data$Date, format="%Y-%m-%d")
# Inspect the data
head(aapl_monthly_data)
summary(aapl_monthly_data)
About the Data:
# Create a time series object
aaplmonthly_ts <- ts(aapl_monthly_data$Close, start=c(2016, 01), end = c(2024, 05), frequency=12)
# Descriptive Analysis
plot(aaplmonthly_ts, main="Monthly Apple Stock Prices", ylab="Close Price", xlab="Time")
summary(aaplmonthly_ts)
boxplot(aaplmonthly_ts ~ cycle(aaplmonthly_ts), main="Seasonal Boxplot of Monthly Apple Stock Prices", ylab="Close Price")
Time Series Plot:
Summary:
Seasonal Boxplot:
# ACF and PACF Plots
par(mar=c(5, 5, 4, 2) + 0.1)
acf(aaplmonthly_ts, main="ACF of Monthly Apple Stock Prices", lag.max = 72)
pacf(aaplmonthly_ts, main="PACF of Monthly Apple Stock Prices", lag.max = 72)
eacf(aaplmonthly_ts)
ACF Plot:
PACF Plot:
# Augmented Dickey-Fuller Test
adf_test <- adf.test(aaplmonthly_ts, alternative="stationary")
print(adf_test)
# Differencing the series if it is not stationary
if (adf_test$p.value > 0.05) {
ts_data_diff <- diff(aaplmonthly_ts, differences=1)
adf_test_diff <- adf.test(ts_data_diff, alternative="stationary")
print(adf_test_diff)
# Update the time series data to the differenced series
aaplmonthly_ts <- ts_data_diff
}
# Time Series Plot after Differencing
plot(aaplmonthly_ts, main="Monthly Apple Stock Prices", ylab="Close Price", xlab="Time")
# ACF and PACF Plots
par(mar=c(5, 5, 4, 2) + 0.1)
acf(aaplmonthly_ts, main="ACF of Monthly Apple Stock Prices", lag.max = 72)
pacf(aaplmonthly_ts, main="PACF of Monthly Apple Stock Prices", lag.max = 72)
eacf(aaplmonthly_ts)
ACF Plot:
PACF Plot:
EACF Plot:
# Fit AR model
ar_model <- Arima(aaplmonthly_ts, order=c(2,0,0))
summary(ar_model)
# Perform diagnostics for AR(2)
par(mar=c(5, 5, 4, 2) + 0.1)
tsdiag(ar_model, gof.lag = 10, main = "Diagnostics for AR(2)")
checkresiduals(ar_model)
# Q-Q plot for AR(2)
residuals_ar2 <- residuals(ar_model)
qqnorm(residuals_ar2, main = "Q-Q Plot of Residuals for AR(2)")
qqline(residuals_ar2, col = "red")
Q-Q Plot:
Residuals vs. Time Plot:
ACF of Residuals:
Histogram of Residuals:
Ljung-Box Test:
# Fit MA model
ma_model <- Arima(aaplmonthly_ts, order=c(0,0,2))
summary(ma_model)
# Perform diagnostics for MA(2)
par(mar=c(5, 5, 4, 2) + 0.1)
tsdiag(ma_model, gof.lag = 10, main = "Diagnostics for MA(2)")
checkresiduals(ma_model)
# Q-Q plot for MA(2)
residuals_ma2 <- residuals(ma_model)
qqnorm(residuals_ma2, main = "Q-Q Plot of Residuals for MA(2)")
qqline(residuals_ma2, col = "red")
Q-Q Plot:
Residuals vs. Time Plot:
ACF of Residuals:
Histogram of Residuals:
Ljung-Box Test:
# Fit ARMA(1,1,1) model
arma_model1 <- Arima(aaplmonthly_ts, order=c(1,1,1))
summary(arma_model1)
# Perform diagnostics for ARIMA(1,1,1)
par(mar=c(5, 5, 4, 2) + 0.1)
tsdiag(arma_model1, gof.lag = 10, main = "Diagnostics for ARIMA(1,1,1)")
checkresiduals(arma_model1)
# Q-Q plot for ARIMA(1,1,1)
residuals_arma111 <- residuals(arma_model1)
qqnorm(residuals_arma111, main = "Q-Q Plot of Residuals for ARIMA(1,1,1)")
qqline(residuals_arma111, col = "red")
Q-Q Plot:
Residuals vs. Time Plot:
ACF of Residuals:
Histogram of Residuals:
Ljung-Box Test:
# ARMA(2,1,1) Model
arma_model2 <- Arima(aaplmonthly_ts, order=c(2,1,1))
summary(arma_model2)
# Perform diagnostics for ARIMA(2,1,1)
par(mar=c(5, 5, 4, 2) + 0.1)
tsdiag(arma_model2, gof.lag = 10, main = "Diagnostics for ARIMA(2,1,1)")
checkresiduals(arma_model2)
# Q-Q plot for ARIMA(2,1,1)
residuals_arma211 <- residuals(arma_model2)
qqnorm(residuals_arma211, main = "Q-Q Plot of Residuals for ARIMA(2,1,1)")
qqline(residuals_arma211, col = "red")
Q-Q Plot:
Residuals vs. Time Plot:
ACF of Residuals:
Histogram of Residuals:
Ljung-Box Test:
# ARMA(2,1,2) Model
arma_model3 <- Arima(aaplmonthly_ts, order=c(2,1,2))
summary(arma_model3)
# Perform diagnostics for ARIMA(2,1,2)
par(mar=c(5, 5, 4, 2) + 0.1)
tsdiag(arma_model3, gof.lag = 10, main = "Diagnostics for ARIMA(2,1,2)")
checkresiduals(arma_model3)
# Q-Q plot for ARIMA(2,1,2)
residuals_arma212 <- residuals(arma_model3)
qqnorm(residuals_arma212, main = "Q-Q Plot of Residuals for ARIMA(2,1,2)")
qqline(residuals_arma212, col = "red")
Q-Q Plot:
Residuals vs. Time Plot:
ACF of Residuals:
Histogram of Residuals:
Ljung-Box Test:
# Create a time series object
aaplmonthly_ts2 <- ts(aapl_monthly_data$Close, start=c(2016, 01), end = c(2024, 05), frequency=12)
# Calculate returns for modeling
returns <- diff(log(aaplmonthly_ts2))
returns <- returns[!is.na(returns)]
plot(returns, main="Monthly Apple Stock Prices", ylab="Close Price", xlab="Time", type="l")
# Specify GARCH(1,1) model
spec_garch <- ugarchspec(variance.model = list(model = "sGARCH", garchOrder = c(1, 1)),
mean.model = list(armaOrder = c(1, 0)),
distribution.model = "norm")
fit_garch <- ugarchfit(spec = spec_garch, data = returns)
# Display the fit summary
fit_garch
# Specify GARCH(1,1) model
spec_garch <- ugarchspec(variance.model = list(model = "sGARCH", garchOrder = c(1, 1)),
mean.model = list(armaOrder = c(2, 1)),
distribution.model = "norm")
fit_garch <- ugarchfit(spec = spec_garch, data = returns)
# Display the fit summary
fit_garch
Model Fit:
# Plot diagnostics
plot(fit_garch, which = 1)
plot(fit_garch, which = 2)
plot(fit_garch, which = 3)
plot(fit_garch, which = 4)
plot(fit_garch, which = 5)
plot(fit_garch, which = 6)
plot(fit_garch, which = 7)
plot(fit_garch, which = 8)
plot(fit_garch, which = 9)
plot(fit_garch, which = 10)
plot(fit_garch, which = 11)
plot(fit_garch, which = 12)
News Impact Curve:
ACF of Squared Standardized Residuals:
ACF of Standardized Residuals:
Q-Q Plot:
Empirical Density of Standardized Residuals:
Cross-Correlations of Squared vs. Actual Observations:
ACF of Absolute Observations:
ACF of Squared Observations:
ACF of Observations:
Conditional Standard Deviation vs. Returns:
Series with 2 Conditional SD Superimposed:
Series with 1% VaR Limits:
# Forecasting with the GARCH model
forecast_garch <- ugarchforecast(fit_garch, n.ahead=12)
plot(forecast_garch, which=1) # Forecast series
# Comparing Models using AIC and BIC
models <- list(ar_model, ma_model, arma_model1, arma_model2, arma_model3)
model_names <- c("AR", "MA", "ARIMA(1,1,1)", "ARIMA(2,1,1)", "ARIMA(2,1,2", "GARCH")
aic_values <- c(sapply(models, AIC), -2.0775)
bic_values <- c(sapply(models, BIC), -1.9473)
comparison <- data.frame(Model=model_names, AIC=aic_values, BIC=bic_values)
print(comparison)